#!/usr/bin/env groovy
package scripts;

/**
 * This script extracts PubMed MetaData from PDF files that contain pubmed metadata.
 */

@Grapes([
	@Grab(group='org.apache.ivy', module='ivy', version='2.2.0'),
	@Grab(group='org.apache.pdfbox', module='pdfbox', version='1.7.1'),
	@Grab(group='org.biogroovy', module='biogroovy', version='1.1')
])
import org.apache.pdfbox.pdmodel.common.*;
import org.apache.pdfbox.pdmodel.*;
import org.biogroovy.io.PDFArticleSerializer;
import org.biogroovy.models.Article;

def cli = new CliBuilder(usage: 'PubMedMetaData.groovy -f <inputFile> ');
cli.with{
	h(longOpt: 'help','PubMedMetaData.groovy -f <inputFile>')
	f(longOpt: 'inputFile', args:1, argName:'file','The PDF to be processed')
}


def options = cli.parse(args)

if (!options){
	return;
}


PDFArticleSerializer serializer = new PDFArticleSerializer();
Article article = serializer.readMetadata("file://"+options.f);
println "Article [ title:${article.title}, pubmedId:${article.pubmedId}";
println "          abstract: ${article.abs}";
println "          authors: ${article.authors.join(';')}";
println "          keywords: ${article.meshHeadings.join(';')}";
println "]"


